#Replication Code for Persuading U.S. White evangelicals to vaccinate for COVID-19: Testing message effectiveness in fall 2020 and spring 2021

#Import packages 
library(tidyverse)
library(haven)
library(estimatr)
library(dotwhisker)
library(dplyr)
library(jtools)
library(huxtable)
library(officer)
library(flextable)

#Set your working directory here

######Study 1#######
Study1_data <- read_dta("ReplicationData_Study1.dta")

#format covariates as factors
Study1_data$age5 <- factor(Study1_data$age5)
Study1_data$educ <- factor(Study1_data$educ)
Study1_data$pid7_nomiss <- factor(Study1_data$pid7_nomiss)


#Demographic information as reported in Supplementary Information
prop.table(table(Study1_data$female)) 
age <- 2020 - Study1_data$birthyr
median(age)
quantile(age, c(0.25,0.75))
prop.table(table(Study1_data$educ))
prop.table(table(Study1_data$pid7_nomiss))

#Vaccine Confidence
(Study1_data %>% 
  summarise(vacconf_scale_mean = mean(vacconf_scale, na.rm = TRUE), vacconf_scale_sd = sd(vacconf_scale, na.rm = TRUE)))
#Flu Shot
(Study1_data %>% 
  summarise(prior_flushot_mean = mean(prior_flushot, na.rm = TRUE), prior_flushot_sd = sd(prior_flushot, na.rm = TRUE)))


######Estimate Models Study 1#######
#Estimate Vaccine Intention at 3 Months and test whether C.I.R. outperforms the information condition
m1 <- lh_robust(vaccinelikely_3month ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata" )

#Estimate Advise a Friend and test whether C.I.R. outperforms the information condition
m2 <- lh_robust(vaccine_advisefriend ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata")

#Estimate Judgment of Non-Vaccinator and test whether C.I.R. outperforms the information condition
m3 <- lh_robust(judging_nonvacscale ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata")

######MAKE TABLE S1#######
#Extract the model from the lh_robust objects
m1_export <- m1$lm_robust
m2_export <- m2$lm_robust
m3_export <- m3$lm_robust

#Export Supplementary Table to Excel (Table is posted in suppelementary document at: https://osf.io/b5rm9/?view_only=10a049d1aebe49a99529472619789fba)
export_summs(m1_export, m2_export, m3_export,
             model.names = c("(1) Vaccine 3 Months", "(2) Advise a Friend", " (3)Judging Non-Vaccinator"),
             coefs =c("Baseline Info." = "treat_info", "C.I.R." = "treat_comm", "C.I.R. + Embarrassment" = "treat_commembarass", "Not Bravery" = "treat_notbravery", "Trust in Science" = "treat_science", "Personal Freedom" = "treat_freedom", "Age (31-45)" = "age52", "Age (46-60)" = "age53", "Age (61-75)" = "age54", "Age (76+)" = "age55", "Female" = "female", "Income" = "faminc5", "Missing Income" = "income_miss", "HS Graduate" = "educ2", "Some College" = "educ3", "2-year Degree" = "educ4", "4-year Degree" = "educ5", "Post-graduate Degree" = "educ6", "Weak Democrat" = "pid7_nomiss2", "Lean Democrat" = "pid7_nomiss3", "Independent/Other" = "pid7_nomiss4", "Lean Republican" = "pid7_nomiss5", "Weak Republican" = "pid7_nomiss6", "Strong Republican" = "pid7_nomiss7", "Working at Home" = "workathome", "Working Outside Home, Choice" = "workoutsidechoice", "Working Outside Home, Required" = "workoutsidehaveto", "Prior Flu Shot (0-5)" = "prior_flushot", "Vaccine Confidence (0-1)" = "vacconf_scale", "Any COVID-19 Risk Factor" = "anyhealthrisk", "Constant" = "(Intercept)"),
             statistics = c("N" = "nobs", "R2" = "r.squared"),
             digits = 3,
             to.file = "xlsx",
             file.name = "TableS1.xlsx")


######Study 2#######
#Import Study 2 data
Study2_data <- read_dta("ReplicationData_Study2.dta")

#Convert covariates to factors
Study2_data$c_age <- factor(Study2_data$c_age)
Study2_data$c_gender <- factor(Study2_data$c_gender)
Study2_data$c_educ <- factor(Study2_data$educ)
Study2_data$c_pid <- factor(Study2_data$c_pid)


#Recode covariates to match Study 1 coding
Study2_data <- Study2_data %>% mutate(c_female = (c_gender == 4))
Study2_data <- Study2_data %>% mutate(c_incmiss = (faminc5 == "Prefer not to say"))
Study2_data$c_educ <- relevel(Study2_data$c_educ, ref = "No HS")

#Demographic information as reported in Supplementary Information
prop.table(table(Study2_data$c_female)) 
age2 <- 2020 - Study2_data$birthyr
median(age2)
quantile(age2, c(0.25,0.75))
prop.table(table(Study2_data$c_educ))
prop.table(table(Study2_data$c_pid))

(Study2_data %>% 
  summarise(c_confscale_mean = mean(c_confscale, na.rm = TRUE), c_confscale_sd = sd(c_confscale, na.rm = TRUE)))

(Study2_data %>% 
  summarise(fluvaccine_mean = mean(fluvaccine, na.rm = TRUE), fluvaccine_sd = sd(fluvaccine, na.rm = TRUE)))

######Estimate Models Study 2#######
#Estimate vaccine likely within 3 months
m4 <- lm_robust(out_likely ~  treat_comm + treat_commembarass + treat_trump + treat_lib + treat_dr + c_age + c_female + c_educ + c_pid + c_faminc + c_incmiss + fluvaccine + c_anyhealth + c_confscale, data = Study2_data, se_type = "stata")
#Estimate advise a friend
m5 <- lm_robust(out_friend ~  treat_comm + treat_commembarass  + treat_trump + treat_lib + treat_dr + c_age + c_female + c_educ + c_pid + c_faminc + c_incmiss + fluvaccine + c_anyhealth + c_confscale, data = Study2_data, se_type = "stata")
#Estimate judging non-vaccinator
m6 <- lm_robust(out_judge_scale ~ treat_comm + treat_commembarass + treat_trump + treat_lib + treat_dr + c_age + c_female + c_educ + c_pid + c_faminc + c_incmiss + fluvaccine + c_anyhealth + c_confscale, data = Study2_data, se_type = "stata")

######MAKE TABLE S2 (as appears at OSF link above#######
export_summs(m4, m5, m6,
             model.names = c("(1) Vaccine 3 Months", "(2) Advise a Friend", " (3)Judging Non-Vaccinator"),
             coefs =c("C.I.R." = "treat_comm", "C.I.R. + Embarrassment" = "treat_commembarass", "Trump Hero" = "treat_trump", "Restore Liberty" = "treat_lib", "Doctors" = "treat_dr", "Age (31-45)" = "c_age2", "Age (46-60)" = "c_age3", "Age (61-75)" = "c_age4", "Age (76+)" = "c_age5", "Female" = "c_femaleTRUE", "Income" = "c_faminc", "Missing Income" = "c_incmissTRUE", "HS Graduate" = "c_educHigh school graduate", "Some College" = "c_educSome college", "2-year Degree" = "c_educ2-year", "4-year Degree" = "c_educ4-year", "Post-graduate Degree" = "c_educPost-grad", "Weak Democrat" = "c_pid2", "Lean Democrat" = "c_pid3", "Independent/Other" = "c_pid4", "Lean Republican" = "c_pid5", "Weak Republican" = "c_pid6", "Strong Republican" = "c_pid7", "Prior Flu Shot (0-5)" = "fluvaccine", "Vaccine Confidence (0-1)" = "c_confscale", "Any COVID-19 Risk Factor" = "c_anyhealth", "Constant" = "(Intercept)"),
             statistics = c("N" = "nobs", "R2" = "r.squared"),
             digits = 3,
             to.file = "xlsx",
             file.name = "TableS2.xlsx")


######MAKE FIGURE 1#######
#This code strips out the covariates from the Study 1 models and keeps the treatment effects
m1_a <- tidy(m1$lm_robust) %>%
  filter(grepl('treat*', term))

m2_a <- tidy(m2$lm_robust) %>%
  filter(grepl('treat*', term))

m3_a <- tidy(m3$lm_robust) %>%
  filter(grepl('treat*', term))

#Add in this variable to identify which model it is
m1_a <- m1_a %>% mutate(model = "Vaccine 3 Mos.")
m2_a <- m2_a %>% mutate(model = "Advise Friend") 
m3_a <- m3_a %>% mutate(model = "Judging Non-Vaccinator")

#Bind together all of the models and remove elements that are not necessary for figure          
Exp1models <- rbind(m1_a,m2_a,m3_a)
Exp1models <- Exp1models %>% select(-c(statistic, p.value, df, std.error,outcome))
#Change treatment strings to be informative labels
Exp1models$term <- str_replace_all(Exp1models$term, c("treat_info" = "Baseline Info.", "treat_commembarass" = "C.I.R. + Embarrassment", "treat_notbravery" = "Not Bravery", treat_science = "Trust in Science", "treat_freedom" = "Personal Freedom", "treat_comm" = "Community Interest & Reciprocity"))

#This code similarly cleans up the models for Study 2
#This code strips out the covariates from the Study 1 models and keeps the treatment effects
m4_a <- tidy(m4) %>%
  filter(grepl('treat*', term))

m5_a <- tidy(m5) %>%
  filter(grepl('treat*', term))

m6_a <- tidy(m6) %>%
  filter(grepl('treat*', term))

#Add in variable to identify which model it is
m4_a <- m4_a %>% mutate(model = "Vaccine 3 Mos.")
m5_a <- m5_a %>% mutate(model = "Advise Friend") 
m6_a <- m6_a %>% mutate(model = "Judging Non-Vaccinator")

#Bind together the models and remove all of the things that are not necessary for ploting         
Exp2models <- rbind(m4_a,m5_a,m6_a)
Exp2models <- Exp2models %>% select(-c(statistic, p.value, df, conf.low,conf.high,outcome))
#Change treatment strings to be informative labels
Exp2models$term <- str_replace_all(Exp2models$term, c("treat_commembarass" = "C.I.R. + Embarrassment", "treat_dr" = "Doctors", treat_lib = "Restore Liberty", "treat_trump" = "Trump Hero", "treat_comm" = "Community Interest & Reciprocity"))

#Import library needed for combining figures
library(ggpubr)

Fig1_LP_Exp1 <- dwplot(Exp1models,
                       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), 
                       #dot_args = list(aes(shape = model)),
                       whisker_args = list(aes(linetype = model))) +
  theme_bw() + xlab("Coefficient Estimate") + ylab("")+
  theme(
    legend.title.align = .5)+
  scale_colour_grey(start = .3, end = .7,
                    name = "Outcome",
                    breaks = c("Judging Non-Vaccinator", "Advise Friend", "Vaccine 3 Mos." ))

Fig1_RP_Exp2 <- dwplot(Exp2models,
                       vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2), 
                       #dot_args = list(aes(shape = model)),
                       whisker_args = list(aes(linetype = model))) +
  theme_bw() + xlab("Coefficient Estimate") + ylab("")+
  theme(
    legend.title.align = .5)+
  scale_colour_grey(start = .3, end = .7,
                    name = "Outcome",
                    breaks = c("Judging Non-Vaccinator", "Advise Friend", "Vaccine 3 Mos." ))


Figure1_twopanel <- ggarrange(Fig1_LP_Exp1, Fig1_RP_Exp2,
                              labels = c("Study 1", "Study 2"),
                              ncol = 2, nrow = 1,
                              common.legend = TRUE, legend= "bottom")

ggsave("Fig1_TwoPanel2.tiff", plot = Figure1_twopanel)

#####Reweighting Analysis from Discussion#####
### Create the bins in Study 2 data that we are going to match to
Study2_data$w_age3 <- recode_factor(Study2_data$c_age, `1`="1", `2`="1", `3`="2", `4`="2", `5`="3")

Study2_data$w_gender <- recode_factor(Study2_data$c_gender, `1`="1", `3`="1", `4`="2")

Study2_data$w_educ <- recode_factor(Study2_data$c_educ, `No HS`="0", `High school graduate`="0", `Some college`="1", `2-year`="1", `4-year`="2",  `Post-grad`="2")

Study2_data$w_pid <- recode_factor(Study2_data$c_pid, `2` = "0", `1` = "1", `3` = "2", `5` = "0", `6` = "2", `7` = "0", `8` = "2")

Study2_data <-
  Study2_data %>%
  mutate(w_highconf = ifelse(c_confscale < .5, "0", "1"))

Study2_data$w_fluvax <- recode_factor(Study2_data$fluvaccine, `0`="0", `1`="1", `2`="1", `3`="1", `4`="2", `5`="2")

#Creates identical bins for study 1
Study1_data$w_age3 <- recode_factor(Study1_data$age5, `1`="1", `2`="1", `3`="2", `4`="2", `5`="3")

Study1_data$w_gender <- as.factor((Study1_data$female + 1))

Study1_data$w_educ <- recode_factor(Study1_data$educ, `1`="0", `2`="0", `3`="1", `4`="1", `5`="2",  `6`="2")

Study1_data$w_pid <- recode(Study1_data$pid7_nomiss, `1` = "0", `2` = "0", `3` = "0", `4` = "1", `5` = "2", `6` = "2", `7` = "2")

Study1_data <-
  Study1_data %>%
  mutate(w_highconf = ifelse(vacconf_scale < .5, "0", "1"))

Study1_data$w_fluvax <- recode_factor(Study1_data$prior_flushot, `0`="0", `1`="1", `2`="1", `3`="1", `4`="2", `5`="2")

#Get the survey package to construct weights
library(survey)
##make Study 1 a survey object
study1 <- svydesign(id = ~1, data = Study1_data)
#2 respondents missing on vaccine confidence
study1 <- na.omit(study1)


#Get the tables of frequencies and save them as distributions
prop.table(table(Study2_data$w_age3))
age.dist <- data.frame(w_age3 = c("1", "2","3"),
                       Freq = nrow(study1) * c(0.2525837, 0.4923522, 0.2550641))

prop.table(table(Study2_data$w_gender))
gender.dist <- data.frame(w_gender = c("1", "2"),
                          Freq = nrow(study1) * c(0.4109136, 0.5890864 ))

prop.table(table(Study2_data$w_educ))
educ.dist <- data.frame(w_educ = c("0","1","2"),
                        Freq = nrow(study1) * c(0.2852418, 0.4266226, 0.2881356))

prop.table(table(Study2_data$w_pid))
pid.dist <- data.frame(w_pid = c("0","1","2"),
                       Freq = nrow(study1) * c(0.05291443, 0.12980570, 0.81727987))

prop.table(table(Study2_data$w_highconf))
highconf.dist <- data.frame(w_highconf = c("0","1"),
                            Freq = nrow(study1) * c(0.5227461, 0.4772539))

prop.table(table(Study2_data$w_fluvax))
fluvax.dist <- data.frame(w_fluvax = c("0","1","2"),
                          Freq = nrow(study1) * c(0.5622158, 0.2327408, 0.2050434 ))

#Use the rake function to create the actual weights in the data for study 1
w_study1 <- rake(study1, sample.margins =  list(~w_age3,~w_gender,~w_educ,~w_pid,~w_highconf,~w_fluvax), population.margins = list(age.dist,gender.dist,educ.dist,pid.dist,highconf.dist,fluvax.dist))
wgts<- data.frame(w_study1$variables$id,weights(w_study1))
#rename the id to match the source data and rename the weights to specify that they are not YouGov weights so they can be merged
wgts <- rename(wgts, id = w_study1.variables.id, weightsToStudy2 = weights.w_study1.)
wgts$id <- as.numeric(wgts$id)

#Merge the survey weights into the Study 1 data
Study1_data$id <- as.numeric(Study1_data$id)
Study1_data <- merge(Study1_data,wgts,by="id")

#Perform the reweighted analysis that appears in the discussion
w_m1 <- lh_robust(vaccinelikely_3month ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata", weights = weightsToStudy2)

w_m2 <- lh_robust(vaccine_advisefriend ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata", weights = weightsToStudy2)

w_m3 <- lh_robust(judging_nonvacscale ~ treat_info + treat_comm + treat_commembarass + treat_notbravery + treat_science + treat_freedom + age5 + female + faminc5 + income_miss + educ + pid7_nomiss + workathome + workoutsidechoice + workoutsidehaveto + prior_flushot + vacconf_scale + anyhealthrisk, data = Study1_data, linear_hypothesis = "treat_commembarass -treat_info  = 0", se_type = "stata", weights = weightsToStudy2)

####Create Table S3 of the Weighted Analysis (as appears at OSF link above)####
w1_export <- w_m1$lm_robust
w2_export <- w_m2$lm_robust
w3_export <- w_m3$lm_robust

export_summs(w1_export, w2_export, w3_export,
             model.names = c("(1) Vaccine 3 Months", "(2) Advise a Friend", " (3)Judging Non-Vaccinator"),
             coefs =c("Baseline Info." = "treat_info", "C.I.R." = "treat_comm", "C.I.R. + Embarrassment" = "treat_commembarass", "Not Bravery" = "treat_notbravery", "Trust in Science" = "treat_science", "Personal Freedom" = "treat_freedom", "Age (31-45)" = "age52", "Age (46-60)" = "age53", "Age (61-75)" = "age54", "Age (76+)" = "age55", "Female" = "female", "Income" = "faminc5", "Missing Income" = "income_miss", "HS Graduate" = "educ2", "Some College" = "educ3", "2-year Degree" = "educ4", "4-year Degree" = "educ5", "Post-graduate Degree" = "educ6", "Weak Democrat" = "pid7_nomiss2", "Lean Democrat" = "pid7_nomiss3", "Independent/Other" = "pid7_nomiss4", "Lean Republican" = "pid7_nomiss5", "Weak Republican" = "pid7_nomiss6", "Strong Republican" = "pid7_nomiss7", "Working at Home" = "workathome", "Working Outside Home, Choice" = "workoutsidechoice", "Working Outside Home, Required" = "workoutsidehaveto", "Prior Flu Shot (0-5)" = "prior_flushot", "Vaccine Confidence (0-1)" = "vacconf_scale", "Any COVID-19 Risk Factor" = "anyhealthrisk", "Constant" = "(Intercept)"),
             statistics = c("N" = "nobs", "R2" = "r.squared"),
             digits = 3,
             to.file = "xlsx",
             file.name = "TableS3.xlsx")
```